#install.packages("haven")
library(haven)
#install.packages("psych")
library(psych)
#install.packages("expss")
library(expss)
#install.packages("texreg")
library(texreg)
#install.packages("xtable")
library(xtable)
#install.packages("ggplot2")
library(ggplot2)
#install.packages("estimatr")
library(estimatr)
#install.packages("plyr")
library(plyr)
#install.packages("dplyr")
library(dplyr)
#install.packages("forcats")
library(forcats)
# install.packages('gridExtra')
library(gridExtra)
# install.packages('grid')
library(grid)
# install.packages('stringr')
library(stringr)

setwd(dir = "~/data/dhbiden/")
#setwd(dir = "c:/data/dhbiden/")

load("Biden.RData")

# Let's fix that variable name
names(dhb)[names(dhb) == 'CTermLim_'] <- 'CTermLim'
names(dhb)[names(dhb) == 'respondent_party'] <- 'ForcedParty'
names(dhb)[names(dhb) == 'pid1'] <- 'PartyID'

# Check work
names(dhb)

# Recode Party ID
dhb$PartyID <- as.character(dhb$PartyID)
dhb$PartyID[dhb$PartyID == "1"] <- "Republican"
dhb$PartyID[dhb$PartyID == "2"] <- "Democrat"
dhb$PartyID[dhb$PartyID == "3"] <- "Independent"
dhb$PartyID[dhb$PartyID == "4"] <- "Other"

# Set the factor levels (otherwise it will be alphabetical)
dhb$PartyID <- factor(dhb$PartyID, levels = c("Democrat", "Republican", "Independent", "Other"))

# Check Work (There is still that empty field for Forced_ID) Let's check on this later.
table(dhb$PartyID)
table(dhb$ForcedParty, dhb$PartyID)

# Add a year variable with (2021) not all 16 Dem questions are missing. Otherwise make it NA for subsequent filtering
dhb$year <- ifelse(rowSums(is.na(dhb[, 4:19])) == 16, NA, 2021)

# Remove cases with missing on all
dhb <- dhb[which(dhb$year==2021), ]
describe(dhb)

# Now is later. Empty field is gone.
table(dhb$ForcedParty, dhb$PartyID)

# --- LETS LOAD THE TRUMP ERA DATA! ---
load("Trump.RData")

names(dht)[names(dht) == 'respondent_party'] <- 'ForcedParty'
names(dht)[names(dht) == 'pid1'] <- 'PartyID'

# Check work
names(dht)

# Recode Party ID
dht$PartyID <- as.character(dht$PartyID)
dht$PartyID[dht$PartyID == "1"] <- "Republican"
dht$PartyID[dht$PartyID == "2"] <- "Democrat"
dht$PartyID[dht$PartyID == "3"] <- "Independent"
dht$PartyID[dht$PartyID == "4"] <- "Other"
dht$PartyID <- factor(dht$PartyID, levels = c("Democrat", "Republican", "Independent", "Other"))

# Check Work (There is still that empty field for Forced_ID)
table(dht$PartyID)
table(dht$ForcedParty, dht$PartyID)

# Same problem. We have that empty field.

# Add a year variable with (202-) not all 16 Dem questions are missing. Otherwise make it NA for subsequent filtering
dht$year <- ifelse(rowSums(is.na(dht[, 4:19])) == 16, NA, 2020)

# Remove cases with missing on all
dht <- dht[which(dht$year==2020), ]
describe(dht)

#Let's check again if we still have that missing field in party ID
table(dht$PartyID)
table(dht$ForcedParty, dht$PartyID)

# And we still do. I have no idea what's going on here.
# These independents and third party supporters should have been forced into
# being closer to Democrats or Republicans.
# I don't want to delete them as these cases are useful when comparing
# raw partisanship and not the forced one, but it's annoying.
# Let's at least replace them with NAs
dht$ForcedParty <- ifelse(dht$ForcedParty == "", NA, dht$ForcedParty)

# Create Dummies for Cogruent (using both raw and forced pid - and a variable for independent)
dht$pidcong <- ifelse(dht$PartyID == "Republican", 1, 0)
dhb$pidcong <- ifelse(dhb$PartyID == "Democrat",   1, 0)
dht$congforced <- ifelse(dht$ForcedParty == "Republican", 1, 0)
dhb$congforced <- ifelse(dhb$ForcedParty == "Democrat",   1, 0)
dht$ind   <- ifelse(dht$PartyID == "Independent",   1, 0)
dhb$ind   <- ifelse(dhb$PartyID == "Independent",   1, 0)

# Let's Give These Better Variable Names
names(dhb) <- c("ResponseId", "PartyID", "ForcedParty",
                "m01CourtSize", "m02Filbust", "m03Gerry", "m04PurgeVRoll",
                "r05GovernExecOrd", "r06DoPplWant", "r07DoNatInt", "r08TermLim",
                "c09BanProtest", "c10ProsJour", "c11ReligClothes", "c12ObeyCourt",
                "e13ForeignHelp", "e14Concede", "e15Disqualify", "e16ImmunePros",
                "year", "pidcong", "congforced", "ind")

names(dht) <- c("ResponseId", "PartyID", "ForcedParty",
                "m01CourtSize", "m02Filbust", "m03Gerry", "m04PurgeVRoll",
                "r05GovernExecOrd", "r06DoPplWant", "r07DoNatInt", "r08TermLim",
                "c09BanProtest", "c10ProsJour", "c11ReligClothes", "c12ObeyCourt",
                "e13ForeignHelp", "e14Concede", "e15Disqualify", "e16ImmunePros",
                "year", "pidcong", "congforced", "ind")

# Bind the two datasets together
dh <- rbind(dht, dhb)

# Let's clean this mess of a dataframe up as the next operation doesn't like tidy
dh <- as.data.frame(dh)
dh <- drop_var_labs(dh)

head(dh)

# Average the DV (if any is observed, calculate an average)
dh$dv <- ifelse(rowSums(is.na(cbind
                              (dh$m01CourtSize, dh$m02Filbust, dh$m03Gerry, dh$m04PurgeVRoll,
                                dh$r05GovernExecOrd, dh$r06DoPplWant, dh$r07DoNatInt, dh$r08TermLim,
                                dh$c09BanProtest, dh$c10ProsJour, dh$c11ReligClothes, dh$c12ObeyCourt,
                                dh$e13ForeignHelp, dh$e14Concede, dh$e15Disqualify, dh$e16ImmunePros)
)) >= 15, NA,
rowMeans(cbind(
  dh$m01CourtSize, dh$m02Filbust, dh$m03Gerry, dh$m04PurgeVRoll,
  dh$r05GovernExecOrd, dh$r06DoPplWant, dh$r07DoNatInt, dh$r08TermLim,
  dh$c09BanProtest, dh$c10ProsJour, dh$c11ReligClothes, dh$c12ObeyCourt,
  dh$e13ForeignHelp, dh$e14Concede, dh$e15Disqualify, dh$e16ImmunePros
),na.rm = TRUE))

# Let's run some descriptives on the non-stacked data still
describe(dh)

# Create 4 category variable first
dh$cat[dh$ForcedParty == "Republican" & dh$year == 2020] <- "TrumpR"
dh$cat[dh$ForcedParty == "Democrat"   & dh$year == 2020] <- "TrumpD"
dh$cat[dh$ForcedParty == "Republican" & dh$year == 2021] <- "BidenR"
dh$cat[dh$ForcedParty == "Democrat"   & dh$year == 2021] <- "BidenD"

# Stack up the data by the 16 variables
long <- reshape(dh, direction = 'long',
                varying = c("m01CourtSize", "m02Filbust", "m03Gerry", "m04PurgeVRoll",
                            "r05GovernExecOrd", "r06DoPplWant", "r07DoNatInt", "r08TermLim",
                            "c09BanProtest", "c10ProsJour", "c11ReligClothes", "c12ObeyCourt",
                            "e13ForeignHelp", "e14Concede", "e15Disqualify", "e16ImmunePros"),
                timevar='QCount',
                v.names=c('d'),
                idvar='ResponseId')

# OK, we got an item 1-16 count here in QCount. But I'd like a text variable (var) for the figure.
long$var <- ""
long$var[long$QCount == "1"]  <- "Legislature to Change Size of Supreme Court (mr)"
long$var[long$QCount == "2"]  <- "Majority Party Appoints Lifetime Judges (mr)"
long$var[long$QCount == "3"]  <- "Majority Party to Draw Districts (mr)"
long$var[long$QCount == "4"]  <- "Government to Purge Voter Rolls (mr)"
long$var[long$QCount == "5"]  <- "President to Govern by Executive Order (er)"
long$var[long$QCount == "6"]  <- "President Should Do What Ppl Want (vs Follow the Law) (er)"
long$var[long$QCount == "7"]  <- "President Should Not Be Constrained by Congress & Courts (er)"
long$var[long$QCount == "8"]  <- "No Presidential Term Limits (er)"
long$var[long$QCount == "9"]  <- "Governors Allowed to Ban Protest (cl)"
long$var[long$QCount == "10"] <- "Governors to Prosecute Journalists (cl)"
long$var[long$QCount == "11"] <- "Governors Ban Religious Symbols (cl)"
long$var[long$QCount == "12"] <- "Elected Officials Disobey Biased Courts (rl)"
long$var[long$QCount == "13"] <- "Use Foreign Help in Campaign (rl)"
long$var[long$QCount == "14"] <- "Candidates Need Not Respect Election Results (rl)"
long$var[long$QCount == "15"] <- "President to Disqualify Candidates in Elections (cl)"
long$var[long$QCount == "16"] <- "President Should Be Immune from Prosecution (rl)"

long$var <- factor(long$var, levels =
                     c("Legislature to Change Size of Supreme Court (mr)",
                       "Majority Party Appoints Lifetime Judges (mr)",
                       "Majority Party to Draw Districts (mr)",
                       "Government to Purge Voter Rolls (mr)",
                       "President to Govern by Executive Order (er)",
                       "President Should Do What Ppl Want (vs Follow the Law) (er)",
                       "President Should Not Be Constrained by Congress & Courts (er)",
                       "No Presidential Term Limits (er)",
                       "Governors Allowed to Ban Protest (cl)",
                       "Governors to Prosecute Journalists (cl)",
                       "Governors Ban Religious Symbols (cl)",
                       "Elected Officials Disobey Biased Courts (rl)",
                       "Use Foreign Help in Campaign (rl)",
                       "Candidates Need Not Respect Election Results (rl)",
                       "President to Disqualify Candidates in Elections (cl)",
                       "President Should Be Immune from Prosecution (rl)", 
                       " ", "Average")
)

# remove cases with missing on ForcedParty (see above -
# only visualizing forced R+D so these need removal)
long <- long[which(!is.na(long$ForcedParty)), ]

# Turning the year into a factor for visualization
long$year <- as.factor(long$year)

# Multiply by 100 so we go from 0 100 and not 0 to 1
long$d <- long$d * 100


head(long)
length(long$ResponseId)

sumdat <- ddply(long, .(cat, var), summarise,
                d.mean = mean(d, na.rm = T),
                d.n    = length(d) - sum(is.na(d)),
)

sumdat

# Break from first submission
sumdat$pid <- NA
sumdat$party <- NA
sumdat$year <- NA
sumdat$fill <- NA
sumdat$pid[sumdat$cat == "BidenD"] <-  1
sumdat$pid[sumdat$cat == "BidenR"] <- -1
sumdat$pid[sumdat$cat == "TrumpD"] <-  1
sumdat$pid[sumdat$cat == "TrumpR"] <- -1
sumdat$party[sumdat$cat == "BidenD"] <- "D"
sumdat$party[sumdat$cat == "BidenR"] <- "R"
sumdat$party[sumdat$cat == "TrumpD"] <- "D"
sumdat$party[sumdat$cat == "TrumpR"] <- "R"
sumdat$year[sumdat$cat == "BidenD"] <- 2021.8
sumdat$year[sumdat$cat == "BidenR"] <- 2021.8
sumdat$year[sumdat$cat == "TrumpD"] <- 2020.5
sumdat$year[sumdat$cat == "TrumpR"] <- 2020.5
sumdat$fill[sumdat$cat == "BidenD"] <- "b"
sumdat$fill[sumdat$cat == "BidenR"] <- "b"
sumdat$fill[sumdat$cat == "TrumpD"] <- "a"
sumdat$fill[sumdat$cat == "TrumpR"] <- "a"
#sumdat$d.se <- sqrt(((sumdat$d.mean / 100)*(1 - sumdat$d.mean/100)) / sumdat$d.n)
sumdat$d.se <- sqrt(((sumdat$d.mean/100) * (1 - sumdat$d.mean/100)) / sumdat$d.n) * 100
sumdat$qnum <- as.numeric(sumdat$var)

print(sumdat)
pd <- position_dodge(0.1)

series <- NA
series <- is.list(series)
myplots <- vector('list', 16)

for(i in 1:16){
  figdat <- sumdat[sumdat$qnum == i, ]
  message(i)
  wrapped_title <- str_wrap(as.character(figdat$var[1]), width = 37)
  myplots[[i]] <- ggplot(figdat, aes(x = year, y = d.mean, group = party, color = party, label = party)) + 
    geom_rect(aes(xmin = -Inf, xmax = 2021, ymin = -Inf, ymax = Inf), fill = "grey92", color = NA, alpha = 0.3) +
    geom_rect(aes(xmin = 2021, xmax =  Inf, ymin = -Inf, ymax = Inf), fill = "white", color = NA, alpha = 0.3) +
    geom_errorbar(aes(ymin = (d.mean - 2 * d.se), 
                      ymax = d.mean + 2 * d.se), 
                  width = 0, position = pd) +
    geom_line(linewidth = 0.2) +
    geom_label(position = pd, size = 5, label.r = unit(.45, "lines"), label.size = 0, aes(fill = fill)) +
    scale_fill_manual(values = c("grey92", "white")) +
    scale_x_continuous(limits = c(2020, 2022), breaks = seq(2020, 2022, by = 1)) +
    scale_y_continuous(limits = c(0, 100)) +
    ggtitle(wrapped_title) + ylab(" ") + xlab(" ") +  
    theme_bw() + 
    theme(plot.title = element_text(size = 21), axis.text = element_text(size = 18), 
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
          legend.position = "none", strip.background = element_rect(fill = "white")) +
    scale_color_manual(name = "Party",
                       values = c('grey10', 'grey45'),
                       labels = c("Republicans", "Democrats"))
}

myplots[[2]]


# Set order
#or <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)
#or <- c(16:1)

# Without independents
#vord2 <- c(22.55, 41, 39.4, 43.38, 45.15, 31.93, 34.56, 17.14, 36.94, 36.43, 27.58, 21.97, 30.62, 31.08, 39.69, 20.12)

vord <- c(24.04, 39.39, 36.85, 41.71, 
          41.96, 30.92, 32.61, 17.93, 
          36.42, 34.42, 24.63, 23.43, 
          29.7, 31.77, 38.75, 20.18)

or <- order(vord, decreasing = T)

grid.arrange(myplots[[or[1]]],  myplots[[or[2]]],  myplots[[or[3]]],  myplots[[or[4]]], 
             myplots[[or[5]]],  myplots[[or[6]]],  myplots[[or[7]]],  myplots[[or[8]]], 
             myplots[[or[9]]],  myplots[[or[10]]], myplots[[or[11]]], myplots[[or[12]]], 
             myplots[[or[13]]], myplots[[or[14]]], myplots[[or[15]]], myplots[[or[16]]], 
             nrow=4, ncol = 4)

all.figures <- 
  arrangeGrob(myplots[[or[1]]],  myplots[[or[2]]],  myplots[[or[3]]],  myplots[[or[4]]], 
              myplots[[or[5]]],  myplots[[or[6]]],  myplots[[or[7]]],  myplots[[or[8]]], 
              myplots[[or[9]]],  myplots[[or[10]]], myplots[[or[11]]], myplots[[or[12]]], 
              myplots[[or[13]]], myplots[[or[14]]], myplots[[or[15]]], myplots[[or[16]]], 
              nrow=4, ncol = 4)

ggsave(file = "Figure2.pdf", all.figures,  width = 24.2, height = 31) #saves g
ggsave(file = "Figure2.png", all.figures,  width = 26, height = 33) #saves g
ggsave(file = "Figure2.tiff", all.figures,  dpi = 200, width = 26, height = 33) #saves g

print(xtable(sumdat[, c(1,2,3,4,9)]), include.rownames = FALSE)
